home *** CD-ROM | disk | FTP | other *** search
- opt nomd,mex
- page 132,60,1,1
- ;*******************************************
- ;Motorola Austin DSP Operation June 30,1988
- ;*******************************************
- ;DSP96002
- ;Peripheral to Memory FFT - 1024 point
- ;File name: F-96.
- ;**************************************************************************
- ; Maximum sample rate: 954 us at 27.0 MHz
- ; Memory Size: Prog: 317 words ; Data: 6146 words
- ; Number of clock cycles: 25760 (12880 instruction cycles)
- ; Clock Frequency: 27.0MHz
- ; Instruction cycle time: 74.1ns
- ;**************************************************************************
-
- fftreal2 macro points,data,odata,coef,ptr1,ptr2,ad
- fftreal2 ident 1,2
- ;
- ; Radix 2 Decimation in Time In-Place Fast Fourier Transform Routine
- ;
- ; Real input data - normally ordered
- ; Real data in Y memory
- ; Complex output data - normally ordered
- ; Real data in X memory
- ; Imaginary data in Y memory
- ; Coefficient lookup table
- ; -Cosine value in X memory
- ; -Sine value in Y memory
- ;
- ; Macro Call - fftreal2 points,data,outdata,coef,ptr1,ptr2,ad
- ;
- ; points number of points (2-32768, power of 2)
- ; data start of data buffer
- ; outdata output data buffer
- ; coef start of sine/cosine table
- ; ptr1 address of pointer to input data block 1
- ; ptr2 address of pointer to input data block 2
- ; ad address of memory-mapped a/d
- ;
- ; dma set-up
- ;
- move #data,x:ptr1 ;set up buffer pointers
- move #data+points,x:ptr2 ;
- move #ad,x:$ffffffde ;a/d memory location into dma source reg.
- move #0,x:$ffffffdd ;no offset for source reg.
- move #2*points-1,x:$ffffffdb ;modulo 2*points for input data collection
- move #1,x:$ffffffd9 ;offset = 1 for data storage
- move #data,x:$ffffffda ;input data base address
- move #points,x:$ffffffdc ;dma counter = 1024
- move #$80008036,x:$ffffffd8 ;start dma in block mode
- ;
- ; test if dma ready, if so switch buffer pointers
- ;
- strt jclr #28,x:$ffffffd8,strt ;wait until dma is done
- move #points,x:$ffffffdc ;if so, reinitialize dma counter
- move #$80008036,x:$ffffffd8 ;restart dma,
- move ptr1,d1.l ;and swap buffer pointers
- move ptr2,d1.m ;
- move d1.l,ptr2 ;
- move d1.m,ptr1 ;
- ;
-
- ; do fft
- move ptr2,r0 ;point to data
- move #points/4,n0 ;offset between input points
- move #points-1,m0 ;mod on input pointer
- move r0,r4 ;ar' pointer
- move (r0)+n0 ;point to br'
- move r0,r5 ;br' pointer
- move (r0)+n0 ;point to c'
- move r0,r6 ;cr', ci' pointer
- move r4,r0 ;point r0 back to ar
- ;
- ; Do first and second Radix 2 FFT passes
- ;
- ; y:ar' = x:ar + x:cr + x:br + x:dr
- ; y:br' = x:ar + x:cr - x:br - x:dr
- ; x:cr' = y:ar - x:cr
- ; y:cr' = x:dr - x:br
- ;
- move y:(r0)+n0,d0.s ;get ar
- move y:(r0)+n0,d1.s ;get br
- move y:(r0)+n0,d2.s ;get cr
- faddsub d0,d2 y:(r0)+n0,d3.s ;cr'=ar-cr,ar+cr,get dr
- do n0,_first2 ;do first 2 passes
- fsubr d1,d3 d0.s,x:(r6) d3.s,d4.s ;dr-br, save cr', copy dr
- fadd d1,d4 x:(r0)+,d7.s d3.s,y:(r6)+ ;dr+br, update r0, save ci'
- faddsub d2,d4 y:(r0)+n0,d0.s ;(ar+cr)(-+)(dr+br)
- move d2.s,x:(r5)+ y:(r0)+n0,d1.s ;save br', get br
- move d4.s,y:(r4)+ ;save ar'
- move y:(r0)+n0,d2.s ;get cr
- faddsub d0,d2 y:(r0)+n0,d3.s ;ar-cr,ar+cr,get dr
- _first2
- ;
- ; Do next passes
- ;
- move #points/8,n5 ;spacing, for 1024 spacing=128
- move ptr2,r2 ;pointer to data
-
- do #@cvi(@log(points)/@log(2)-2.5),_next ;7 passes for 1024 pts
- move r2,r5 ;reset pointer to data
- move n5,n0 ;same offset
- move r5,r0 ;ar pointer
- move (r5)+n5 ;+1/4
- move r5,r4 ;br pointer
- move (r5)+n5 ;+1/2
- move r5,r1 ;ci pointer
- move (r5)+n5 ;+3/4
-
- move x:(r5)+,d0.s y:(r0)+n0,d4.s ;get dr, get ar
- do n0,_inner
- fneg d0 y:(r0)-n0,d1.s ;ci'=-dr, get br
- faddsubr d4,d1 d0.s,y:(r1)+ ;br'=ar-br,ar'=ar+br, save ci'
- move d4.s,x:(r4)+ d1.s,y:(r0)+ ;save br', save ar'
- move x:(r5)+,d0.s y:(r0)+n0,d4.s ;get dr, get ar
- _inner
- move n5,a ;get bflys/pass
- lsr a ;/2
- move a1,n5 ;put back
- _next
-
- ;
- ; special pass: real input (4-point), output in normal order. The 4-th
- ; output point is stored as the complex conjugate of the 3 rd.
- ;
- move #data,r0 ;input pointer
- move #odata,r4 ;output pointer
- move #points/2,n4
- move #0,m4 ;bit reverse output
-
- move y:(r0)+,d0.s ;get ar
- move y:(r0)+,d4.s ;get br
- faddsubr d0,d4 x:(r0)+,d1.s ;get cr
- move d4.s,y:(r4)+n4 ;save ar'
- move x:(r0)+,d2.s ;get dr
- fneg d2 d2.s,d5.s d0.s,x:(r4)+n4 ;copy dr, save br'
- move d1.s,x:(r4) ;save cr'
- move d2.s,y:(r4)+n4 ;save ci'
- move d1.s,x:(r4) ;save cr'
- move d5.s,y:(r4)+n4 ;save ci'*
-
-
- ;
- ;Do first (2-point) complex fft (this has one "last pass" only) with normally ordered
- ;output data and conjugate reverse storage of the next two points
- ;
- move #4,n2 ;offset for input data pointer
- move r4,n7 ;initialize output data pointer
- move (r2)+n2 ;adjust input data pointer
- move #0,m6 ;bit-reversed addressing for r6
- move #coefsize/2,n6
-
- ; last pass
- ;
- move r2,r0 ;Point to input data block
- move n7,r4 ;r4 points to A output
- move #2*odata+points,r3 ;initialization of conjugate reverse pointer
- move #-1,m3 ;r3 (conjugate reverse pointer) decrements linearly in initialization
- move r4,n3 ;offset for conjugate pointer initialization
- lea (r0)+,r1 ;r1 points to B input
- move (r3)-n3 ;r3 now has end of next output data block
- move #0,m3 ;r3 will decrement bit-reversed in output storage
- move #points/2,n3 ;correct offset for reverse counter
- move #2,n0 ;offset is 2 for A input pointer
- lea (r3)+n3,r6 ;r6 now contains the next output pointer a starting address
- move r6,n7 ;n7 contains next output data block starting address
- move #coef+coefsize/4,r6 ;r6 points to twiddle factors
- move n0,n1 ;offset is 2 for B input pointer
- move #points/4,n4 ;offset is #points/4 for A output pointer
- move n4,n5 ;offset is #points/4 for B output pointer
- move #0,m4 ;bit reversed addressing for A output pointer
- lea (r4)+n4,r5 ;r5 points to B output by adding points/4 once----|
- move m4,m5 ;bit reversed addressing for B output pointer |
- move (r5)+n5 ;and once more to odata <-----|
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmove y:(r1),d7.s ;
- fmpy d8,d7,d3 x:(r1)+n1,d6.s ;
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ; Last groups are implemented as
- fmove x:(r6)+n6,d9.s y:(),d8.s ; single butterflies with storage to
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ; the output data buffer. Data is stored
- ; in normal order, and the complex conjugate is
- ; stored in the next output block using the
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) ; conjugate reverse counter r3
- fmove d0.s,x:(r3)-n3 y:(r0)+n0,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmove d4.s,x:(r3)+n3 y:(r6),d8.s ;
- fadd d3,d0 x:(r0),d4.s d5.s,y:(r4)+n4 ;
- fneg d5 d2.s,y:(r5)+n5 ;
- fneg d2 x:(r6)+n6,d9.s d5.s,y:(r3)-n3 ;
- fmpy d8,d6,d2 d2.s,y:(r3)-n3 ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ;
- _end_last
-
- ;
- ;Do second (four-point) complex fft (this has a "next to last" and "last pass"
- ;only with normal output storage and conjugate reverse storage of the next four
- ;points)
- ;
- move (r2)+n2 ;initialize input data pointer
- ;
- ; next to last pass
- ;
- move #1,n2 ;initialize number of groups
- move r2,r0 ;point to input data block
- move r0,r4 ;initialize pointers and offsets
- lea (r0)+2,r1 ;
- move r1,r5 ;
- move #coef+coefsize/4,r6 ;
- move #3,n0 ;
- move n0,n1 ;
- move n0,n4 ;
- move n0,n5 ;
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmove y:(r1),d7.s ;
- fmpy d8,d7,d3 x:(r1)+,d6.s ;
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ;
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;each group implemented as one
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+ ;four-point butterfly
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s d5.s,y:(r4)+ ;
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+n0,d5.s
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+n5
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s d5.s,y:(r4)+n4
- _end_next
- ;
- ; last pass
- ;
- move n2,d0.l ;number of groups in last pass=
- lsl d0 r2,r0 ;2*number of groups in previous pass. Point to input data block.
- move d0.l,n2 ;number of stages in this group -->n2
- move n7,r4 ;r4 points to A output
- move #2*odata+points,r3 ;initialization of conjugate reverse pointer
- move #-1,m3 ;r3 (conjugate reverse pointer) decrements linearly in initialization
- move r4,n3 ;offset for conjugate pointer initialization
- lea (r0)+,r1 ;r1 points to B input
- move (r3)-n3 ;r3 now has end of next output data block
- move #0,m3 ;r3 will decrement bit-reversed in output storage
- move #points/2,n3 ;correct offset for reverse counter
- move #2,n0 ;offset is 2 for A input pointer
- lea (r3)+n3,r6 ;r6 now contains the next output pointer a starting address
- move r6,n7 ;n7 contains next output data block starting address
- move #coef+coefsize/4,r6 ;r6 points to twiddle factors
- move n0,n1 ;offset is 2 for B input pointer
- move #points/4,n4 ;offset is #points/4 for A output pointer
- move n4,n5 ;offset is #points/4 for B output pointer
- move #0,m4 ;bit reversed addressing for A output pointer
- lea (r4)+n4,r5 ;r5 points to B output by adding points/4 once----|
- move m4,m5 ;bit reversed addressing for B output pointer |
- move (r5)+n5 ;and once more to odata <-----|
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmove y:(r1),d7.s ;
- fmpy d8,d7,d3 x:(r1)+n1,d6.s ;
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ; Last groups are implemented as
- fmove x:(r6)+n6,d9.s y:(),d8.s ; single butterflies with storage to
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ; the output data buffer. Data is stored
- ; in normal order, and the complex conjugate is
- do n2,_end_last ; stored in the next output block using the
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) ; conjugate reverse counter r3
- fmove d0.s,x:(r3)-n3 y:(r0)+n0,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmove d4.s,x:(r3)+n3 y:(r6),d8.s ;
- fadd d3,d0 x:(r0),d4.s d5.s,y:(r4)+n4 ;
- fneg d5 d2.s,y:(r5)+n5 ;
- fneg d2 x:(r6)+n6,d9.s d5.s,y:(r3)-n3 ;
- fmpy d8,d6,d2 d2.s,y:(r3)-n3 ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ;
- _end_last
- ;
- ;Do all remaining complex fft's (starting with 8-point) with normally ordered
- ;output storage and conjugate reverse storage of the next output block
- ;
- clr d9 ;initialize the # passes
- move #-1,m2 ;linear addressing for input data block pointer
- move #8,n2 ;offset=8 for input data ptr.
- move #4,d8 ;initialize number of FFT points
- move (r2)+n2 ;initialize r2
- do #@cvi(@log(points)/@log(2)-3.5),_endfft ;do for all fft's. Ex.: 6 for 1024-pt.
-
- lsl d8 ;new number of FFT pts = number of FFT points * 2
- inc d9 ;increment # passes
- clr d2 d8.l,d1.l ;number of FFT points -->d1
- move d2.l,m6 ;bit-reversed addr. for coef. ptr
- move #1,d0.l ;initialize # groups
- move d8.l,n2 ;offset for computing new input data block ptr.
- move (r2)+n2 ;point to next input data block
-
- do d9.l,_end_pass ;do for all passes
- move d0.l,n2 ;load number of groups
- move r2,r0 ;point to data
- lsr d1 #coef+coefsize/4,r6 ;# butterflies per group/2,r6 points to first coeff.
- dec d1 d1.l,n0 ;decrement number of butterflies twice
- dec d1 d1.l,n1 ;(first two butterflies are done separately)
- move d1.l,n3 ;number of butterflies per group-->n3
- move n0,n4 ;initialize pointers and pointer offsets
- move n0,n5 ;
- lea (r0)+n0,r1 ;
- move r0,r4 ;
- move r1,r5 ;
- fmove x:(r6)+n6,d9.s y:(),d8.s ;first two
- fmove y:(r1),d7.s ;butterflies in this
- fmpy d8,d7,d3 x:(r1)+,d6.s ;pass
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s ;
-
- do n2,_end_grp ;do for all groups in this pass
-
- do n3,_end_bfy ;do for all butterflies in this group
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+,d5.s ;4-instruction butterfly with
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;constant twiddle
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+ ;factor
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s d5.s,y:(r4)+ ;
- _end_bfy
- move (r1)+n1 ;first two
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+,d5.s ;butterflies in next
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;pass
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+ ;
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s d5.s,y:(r4)+ ;
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+n0,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+n5 ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s d5.s,y:(r4)+n4 ;
- _end_grp
- move n2,d0.l ;number of groups for next pass=
- lsl d0 n0,d1.l ;2*number of groups for this pass
- _end_pass
- ;
- ; next to last pass
- ;
- move d0.l,n2 ;initialize number of groups
- move r2,r0 ;point to input data block
- move r0,r4 ;initialize pointers and offsets
- lea (r0)+2,r1 ;
- move r1,r5 ;
- move #coef+points/4,r6 ;
- move #3,n0 ;
- move n0,n1 ;
- move n0,n4 ;
- move n0,n5 ;
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmove y:(r1),d7.s ;
- fmpy d8,d7,d3 x:(r1)+,d6.s ;
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ;
-
- do n2,_end_next ;do for all groups
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;each group implemented as one
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+ ;four-point butterfly
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+,d6.s d5.s,y:(r4)+ ;
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) y:(r0)+n0,d5.s
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s d2.s,y:(r5)+n5
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s d5.s,y:(r4)+n4
- _end_next
- ;
- ; last pass
- ;
- move n2,d0.l ;number of groups in last pass=
- lsl d0 r2,r0 ;2*number of groups in previous pass. Point to input data block.
- move d0.l,n2 ;number of stages in this group -->n2
- move n7,r4 ;r4 points to A output
- move #2*odata+points,r3 ;initialization of conjugate reverse pointer
- move #-1,m3 ;r3 (conjugate reverse pointer) decrements linearly in initialization
- move r4,n3 ;offset for conjugate pointer initialization
- lea (r0)+,r1 ;r1 points to B input
- move (r3)-n3 ;r3 now has end of next output data block
- move #0,m3 ;r3 will decrement bit-reversed in output storage
- move #points/2,n3 ;correct offset for reverse counter
- move #2,n0 ;offset is 2 for A input pointer
- lea (r3)+n3,r6 ;r6 now contains the next output pointer a starting address
- move r6,n7 ;n7 contains next output data block starting address
- move #coef+points/4,r6 ;r6 points to twiddle factors
- move n0,n1 ;offset is 2 for B input pointer
- move #points/4,n4 ;offset is #points/4 for A output pointer
- move n4,n5 ;offset is #points/4 for B output pointer
- move #0,m4 ;bit reversed addressing for A output pointer
- lea (r4)+n4,r5 ;r5 points to B output by adding points/4 once----|
- move m4,m5 ;bit reversed addressing for B output pointer |
- move (r5)+n5 ;and once more to odata <-----|
- fmove x:(r6)+n6,d9.s y:(),d8.s ;
- fmove y:(r1),d7.s ;
- fmpy d8,d7,d3 x:(r1)+n1,d6.s ;
- fmpy d9,d6,d0 ;
- fmpy d9,d7,d1 y:(r1),d7.s ;
- fmpy d8,d6,d2 fadd d3,d0 x:(r0),d4.s ; Last groups are implemented as
- fmove x:(r6)+n6,d9.s y:(),d8.s ; single butterflies with storage to
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ; the output data buffer. Data is stored
- ; in normal order, and the complex conjugate is
- do n2,_end_last ; stored in the next output block using the
- fmpy d9,d6,d0 fsub d1,d2 d0.s,x:(r4) ; conjugate reverse counter r3
- fmove d0.s,x:(r3)-n3 y:(r0)+n0,d5.s ;
- fmpy d9,d7,d1 faddsubr d5,d2 d4.s,x:(r5) y:(r1),d7.s ;
- fmove d4.s,x:(r3)+n3 y:(r6),d8.s ;
- fadd d3,d0 x:(r0),d4.s d5.s,y:(r4)+n4 ;
- fneg d5 d2.s,y:(r5)+n5 ;
- fneg d2 x:(r6)+n6,d9.s d5.s,y:(r3)-n3 ;
- fmpy d8,d6,d2 d2.s,y:(r3)-n3 ;
- fmpy d8,d7,d3 faddsubr d4,d0 x:(r1)+n1,d6.s ;
- _end_last
- _endfft
- jmp strt ;go back and start new fft
-